In [62]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import feather
import plotly.express as px
import psutil
import libpysal as lp
import esda
import rasterio as rio
import contextily as ctx
import shapely.geometry as geom
import mapclassify as mc
%matplotlib inline
In [63]:
os.chdir('..')
os.chdir('data')
os.getcwd()
Out[63]:
'C:\\Users\\tpytsui\\Documents\\Surfdrive\\Documents\\_PhD\\_github\\lma_circular-maker-city\\data'

ESDA - flows categorized by industry

Preparing dataset

Here we prepare a geodataframe for the ESDA. The dataframe should contain the following columns:

  • postcode
  • log(kg)
  • material code (consisting of gnc / esv code)
  • material description
  • geometry
In [70]:
df = feather.read_dataframe('lma/af_2019-2020_valid.feather')
df = df[['eaPostcode', 'esvChap',
       'esvChapDesc', 'gncChap', 'gncChapDesc', 'vmc', 'vmcDesc',
       'sbiSec', 'sbiSecDesc', 'kg']]
df.eaPostcode = df.eaPostcode.str[:4]

# material column
def mat(row): 
    if row.gncChap == '--': 
        mat = 'ESV-' + row.esvChap
    else: 
        mat = 'GNC-' + row.gncChap
    return mat
df['mat'] = df.apply(lambda row: mat(row), axis=1)

# display
mul = df[df.sbiSec == 'mul'].kg.sum()
unk = df[df.sbiSec == '--'].kg.sum()
tot = df.kg.sum()
print('% kg with multiple sbis: {}\n% kg with unknown sbi(s): {}\n% kg with matched sbis: {}'.format(
    round(mul/tot*100,2), round(unk/tot*100,2), round(100-((mul+unk)/tot*100),2)))
df.rename(columns={'eaPostcode': 'PC4'}, inplace=True)

# drop invalid rows - where sbiSec == 'mul' or sbiSec == '--'
df = df[(df.sbiSec != 'mul') & (df.sbiSec != '--')]
print('invalid rows dropped.')
print(df.shape)
df.head()
% kg with multiple sbis: 2.95
% kg with unknown sbi(s): 10.94
% kg with matched sbis: 86.11
invalid rows dropped.
(2132, 11)
Out[70]:
PC4 esvChap esvChapDesc gncChap gncChapDesc vmc vmcDesc sbiSec sbiSecDesc kg mat
0 5301 12.6 Mineral wastes: Soils -- -- C04 Immobiliseren voor hergebruik F Bouwnijverheid 980340 ESV-12.6
1 9222 12.6 Mineral wastes: Soils -- -- B05 Overig inzetten als grondstof F Bouwnijverheid 15860 ESV-12.6
2 9243 12.1 Mineral wastes: Construction and demolition wa... -- -- B05 Overig inzetten als grondstof F Bouwnijverheid 686240 ESV-12.1
3 9243 12.1 Mineral wastes: Construction and demolition wa... -- -- B05 Overig inzetten als grondstof A Landbouw, bosbouw en visserij 121360 ESV-12.1
4 9243 12.1 Mineral wastes: Construction and demolition wa... -- -- B05 Overig inzetten als grondstof A Landbouw, bosbouw en visserij 230220 ESV-12.1
In [71]:
# top 5 industry flows: 
df.groupby(['sbiSec', 'sbiSecDesc']).sum().kg.reset_index().sort_values('kg', ascending=False).head()
Out[71]:
sbiSec sbiSecDesc kg
2 C Industrie 596957827
5 F Bouwnijverheid 276110290
6 G Groot- en detailhandel; reparatie van auto’s 213751824
4 E Winning en distributie van water; afval- en af... 185021288
0 A Landbouw, bosbouw en visserij 114780078
In [72]:
# identify top n materials by weight
n = 10
topInd = df.copy()
topInd = topInd.groupby(['sbiSec', 'sbiSecDesc']).sum().kg.reset_index().sort_values('kg', ascending=False).head(n)

# pie chart
fig = px.pie(topInd, values='kg', names='sbiSecDesc', title='Top {} industries by weight'.format(n))
fig.show()

# make lists for topMat, topGnc, and topEwc
topInd = list(topInd.sbiSec)
top5 = topInd[:5]
top10 = topInd[5:]

print('top {} industries by weight: {}'.format(n, topInd))
top 10 industries by weight: ['C', 'F', 'G', 'E', 'A', 'K', 'D', 'H', 'M', 'O']
In [73]:
df.head()
Out[73]:
PC4 esvChap esvChapDesc gncChap gncChapDesc vmc vmcDesc sbiSec sbiSecDesc kg mat
0 5301 12.6 Mineral wastes: Soils -- -- C04 Immobiliseren voor hergebruik F Bouwnijverheid 980340 ESV-12.6
1 9222 12.6 Mineral wastes: Soils -- -- B05 Overig inzetten als grondstof F Bouwnijverheid 15860 ESV-12.6
2 9243 12.1 Mineral wastes: Construction and demolition wa... -- -- B05 Overig inzetten als grondstof F Bouwnijverheid 686240 ESV-12.1
3 9243 12.1 Mineral wastes: Construction and demolition wa... -- -- B05 Overig inzetten als grondstof A Landbouw, bosbouw en visserij 121360 ESV-12.1
4 9243 12.1 Mineral wastes: Construction and demolition wa... -- -- B05 Overig inzetten als grondstof A Landbouw, bosbouw en visserij 230220 ESV-12.1
In [74]:
# separate dfs for each industry type in topInd
indFlows = dict.fromkeys(topInd, None)

# read pc4 for geolocate
pc4 = gpd.read_file('spatial-data/nl_pc4.shp')
pc4 = pc4[['PC4', 'geometry']]

# for each type of material flow, make gdf with all postcodes, where column nlog = log(kg) received by industry v
for i in topInd:
    # make temp df for each industry
    temp = df[df.sbiSec.isin([i])][['PC4', 'sbiSec', 'kg']]
    temp = temp.groupby(['PC4', 'sbiSec']).sum().kg.reset_index()

    # geo locate 
    temp = pd.merge(pc4, temp, how='left', on='PC4')
    temp = gpd.GeoDataFrame(temp)
    temp.kg.fillna(0, inplace=True)

    # calculate log values
    def log(x): 
        if x > 0: 
            return np.log(x)
        else: 
            return 0
    temp['logn'] = temp.kg.map(lambda x: log(x))
    temp.sbiSec.fillna('--', inplace=True)

    # add temp gdf to list of matFlows 
    indFlows[i] = temp

Data at first glance (a-spatial)

Here we take a first glance of the data by creating histograms. The three figures below show the distribution of values (kg received) throughout the dataset for each type of industry flow. This step doesn't say anything about the spatial relationships - we will explore in the rest of this notebook.

In [75]:
for top in [top5, top10]: 
    
    fig, ax = plt.subplots(2,5, figsize=(30,10))

    # original values (not log transformations)
    for i, v in enumerate(top):
        temp = indFlows[v] # make temp df 
        temp = temp[temp.kg != 0]
        sn.histplot(data=temp, x="kg", bins=30, ax=ax[0,i])
        ax[0,i].set_title('# postcodes for {}'.format(v))

    for i, v in enumerate(top):
        temp = indFlows[v] # make temp df 
        temp = temp[temp.kg != 0]
        sn.histplot(data=temp, x="logn", bins=30, ax=ax[1,i])

    plt.show()

Like the total kg received, the kg received per industry also follows a log distribution, meaning that there are lots of postcodes receiving a low kg of waste, and a few postcodes receiving high amounts of waste. However, after log-transformation, the manufacturing industry doesn't seem to follow a normal distribution very well. This could be because there aren't a lot of postcodes associated wtih manufacturing industry in the first place. The construction and agriculture industry seems to follow a normal distribution pretty well.

I'm also trying to understand whether to choose a log transformation of base 10 or base e (natural log). Both log transformation result in the exact same shape, but the scale on the x-axis is different. I'm still trying to understand why that is the case.

Mapping

Here we map out the kg and log(kg) of secondary resources received for each industry type. Some observations of the maps:

  • the manufacturing industry receivers seem to be almost randomly spatially distributed across the Netherlands. This is strange, because you would expect more clusters around the major industrial areas in Rotterdam or Amsterdam. A possible explanation is that there were many manufacturing industry flows that were not identified because of the poor sbi code matching process.
  • in fact, there doesn't seem to be clusters in any of the industries. You would also expect the agri-industry receivers to be clustered around the groene-hart or other agricultural-heavy areas, but that doesn't seem to be the case. Another possible explanation is that we are capturing the headquarters of the receivers, rather than the actual location of reuse. More discussion with Tjerk is needed to understand this further
In [76]:
# make pc2 geodataframe to speed up plotting process
pc2 = gpd.read_file('spatial-data/nl_pc2.shp')
In [77]:
for top in [top5, top10]: 
    fig, ax = plt.subplots(1,5, figsize=(8*5,7))

    for i, v in enumerate(top): 
        temp = indFlows[v]
        temp = temp[temp.kg != 0] # remove zero values
        pc2.plot(ax=ax[i], color='#f0f0f0')
        temp.plot(ax=ax[i], column='logn', cmap='OrRd', legend=True)
        ax[i].axis('off')
        ax[i].set_title(v + ' (log values)')

    plt.show()

Zero values

Another aspect of the data that we need to consider are the zero values. 60% of postcodes in the Netherlands don't receive any secondary resources - this is highlighted in red in the map below. That is a lot of datapoints that will skew the data - so we need to think about whether to include the postcodes with zero kg received in our analysis later. For now, I've kept the zero values, because I think it's important to take these postcodes into account.

In [78]:
for top in [top5, top10]:
    fig, ax = plt.subplots(1,5, figsize=(5*5,8))

    for i, v in enumerate(top): 
        temp = indFlows[v]
        temp = temp[temp.kg == 0] # pick out zero values
        pc2.plot(ax=ax[i], color='#f0f0f0')
        temp.plot(ax=ax[i], color='red')
        ax[i].axis('off')
        percZero = round(len(temp)/len(pc4)*100, 2)
        ax[i].set_title('{} zero values ({}%)'.format(v, percZero))

    plt.show()

Global spatial auto-correlation

Before conducting any spatial analysis, it's important to find out whether the data has any relationship to space - does the data follow any kind of spatial pattern, or are the values just randomly distributed through space? Although we can answer this question from just looking at the map and trying to detect patterns with our eyes, we can also use spatial statistics to get a more accurate understanding of the data.

In order to do this, we look at the dataset and ask the question: 'If we threw values (kg) randomly on the map in simulations, how likely would these simulations resemble our actual data?' or in other words, 'what is the probability that our data (on kg received per pc) is randomly distributed in space?'

If we find that our data is not randomly distributed in space, we can assume that our data is somehow affected by geographical space, and proceed with our spatial analysis. However, if we find that our data is randomly distributed in space, there is not point in conducting further spatial analysis. In this case, we will have to further separate the data (by material type, for example) until we get a dataset that isn't randomly distributed in space.

Spatial weights, spatial lag

Before we answer the questions above, we first need to find out for each postcode who its neighbors are, and define what we mean by 'neighbors'. For our analysis, we define neighbors as any postcodes that share an edge or a vertex (point). This definition is called a 'queen spatial weight', in reference to the queen chess piece.

Then, we need to describe, for each postcode, the conditions of its neighborhood. We do this by using a process of calculation called spatial lag. Here, for each postcode, we identify its neighbors, then take an average of all their values. This average neighbors' value is then attributed to the postcode. You can see this process in the maps below - the map on the left shows the actual kg received for each post code. The map on the right shows the average kg received of each postcode by its neighbors (and not itself).

This allows us to understand whether the postcodes in our data have relationship to its spatial neighbors. For example, do postcodes with high values tend have neighbors with high values as well? Or is it just random, where some high value postcodes have high value neighbors, while others don't?

In [79]:
# calculate spatial weights of NL (they are the same for all industry flows, since they are all in NL)
wq =  lp.weights.Queen.from_dataframe(pc4) # queen spatial weights - for each pc, identify neighbors (other pcs that share a vertex or edge)
wq.transform = 'r' # weights get row normalized, so when you sum the row, it equals 1
('WARNING: ', 3478, ' is an island (no neighbors)')
('WARNING: ', 3611, ' is an island (no neighbors)')
C:\Users\tpytsui\Miniconda3\lib\site-packages\libpysal\weights\weights.py:172: UserWarning:

The weights matrix is not fully connected: 
 There are 7 disconnected components.
 There are 2 islands with ids: 3478, 3611.

As seen above, the pc4 shpfile has two islands. These postcodes don't have direct neighbors (aka don't share an edge or vertex with another postcode). This will pose problems further down the notebook - calculating local moran's I requires all elements to have at least one neighbor. For now, I solve the problem by just dropping these two postcodes just before I do the Local Moran's I calculations, but there are better solutions:

  • assign the nearest postcode as the neighbor of these islands, even if this pc doesn't share an edge or vertex
  • use distance-based spatial weights instead of queen spatial weights
  • for more details, see book on geospatial data science for python
In [80]:
# calculating spatial lag for each material type 
for i in topInd: 
    temp = indFlows[i].copy()

    # calculate spatial lag of kg (NOT logn(kg))
    y = temp['kg']
    ylag = lp.weights.lag_spatial(wq, y)
    temp['lag_logn'] = ylag

    # natural log transform spatial lag 
    def log(x): 
        if x == 0: 
            return 0
        else: 
            return np.log(x)
    temp.lag_logn = temp.lag_logn.map(lambda x: log(x))

    # assign new df to matFlows dictionary
    indFlows[i] = temp

important: you cannot directly calculate the average of log values. You have to calculate the average of kg values, then log transform that average. otherwise it is not meaningful. It would be good to check with Alexis (statistics prof) with this.

In [81]:
for top in [top5, top10]:
    fig, ax = plt.subplots(2,5, figsize=(5*5,8))

    for c, col in enumerate(['logn', 'lag_logn']):    
        for i, v in enumerate(top): 
            temp = indFlows[v]
            temp.plot(column=col, ax=ax[c][i], legend=True, cmap='OrRd')
            ax[c][i].axis('off')
            ax[c][i].set_title(col + ' kg received for ' + v)

    plt.show()

the spatial lag patterns for some of the material types seem a little strange - they look like misshapen doughnuts. This is a result of a high value surrounded by low or zero values. Perhaps in this case it would be more meaningful to calculate the distance-based spatial lag, rather than queen spatial lag.

Moran's I: is our data randomly distirbuted in space?

Although these p-values in the previous paragraph are impressive, it isn't accurate because we've simplified the data into binary values, losing a lot of information in the process. In order to get a more accurate understanding, we will do the same process on our dataset with continuous values. However, with continuous values, we cannot use join counts to analyse the data, because there are an infinite number of join types between neighbors. So instead, we use another method to measure spatial clustering - Moran's I. (Here's a helpful video explaining it)

Moran's I ranges from -1 to 1. 1 means that the data is perfectly clustered; 0 means perfectly random; and -1 means perfectly dispersed.

In [82]:
from IPython.display import Image
print('Moran\'s I')
Image("../images/morans_i.png", width=600)
Moran's I
Out[82]:
In [83]:
# make dictionary of moran's I 
matMI = dict.fromkeys(topInd, None)

# calculate moran's I 
for i, v in enumerate(topInd): 
    temp = indFlows[v]
    y = temp['logn']
    np.random.seed(12345)
    mi = esda.moran.Moran(y, wq)
    matMI[v] = [mi, round(mi.I, 2)]
    print('Moran\'s I for {}: {}'.format(v, round(mi.I, 2)))
    
# histplot of Moran's I values
print('')
mis = pd.DataFrame.from_dict(matMI).T
mis.rename(columns={1: 'morans_I'}, inplace=True)
mis.drop(labels=[0], axis=1, inplace=True)
sn.histplot(data=mis, x='morans_I', bins=10)
plt.show()
Moran's I for C: 0.03
Moran's I for F: 0.05
Moran's I for G: 0.02
Moran's I for E: 0.0
Moran's I for A: 0.05
Moran's I for K: 0.02
Moran's I for D: -0.0
Moran's I for H: -0.01
Moran's I for M: -0.0
Moran's I for O: -0.0

The Moran's Is for the industry flows range between 0.03-0.24, which seems quite low. However, Moran's I cannot be interpreted at face value, because it is an inferential statistic (at least according to this website). We can only interpret this number by comparing it to the moran's I of random simulations, similar to the previous process with the binary data. In the figure below, the blue curve shows the distribution of Moran's I values in 999 simulations, the black line shows the average Moran's I value of the simulations, and the red line shows the Moran's I of our actual data.

In the figures below, we can see that the log(kg) values have a statistically significant spatial clustering (p-value = 0.001), while the spatial clustering of the normal (kg) values are not statistically significant (p-value = 0.088 > 0.05). This means that it would be worth conducting spatial analysis on the log(kg) values, but not on the normal kg values. We should also explore further the spatial clustering of different material types to see if those are statistically significant as well.

In [87]:
matMI['C'][0]
Out[87]:
<esda.moran.Moran at 0x206dceaed60>
In [88]:
for top in [top5, top10]: 
    fig, ax = plt.subplots(1,5, figsize=(6*5, 5))

    for i,v in enumerate(top):
        mi = matMI[v][0]
        sn.kdeplot(mi.sim, shade=True, ax=ax[i]) # kde plot of moran's I for 999 simulations 
        ax[i].vlines(mi.I, 0, 40, color='r')
        ax[i].vlines(mi.EI, 0, 40)
        ax[i].set_xlabel("Moran's I")
        ax[i].set_title("{} (p-value: {})".format(v, mi.p_sim))

    plt.show()

Although all p-values are lower than 0.05, there is a chance that the spatial distribution of the manufacturing industry receivers is random - the red line overlaps the blue curve in the second graph. This means that it might not be worth analyzing the manufacturing industry flows, because there isn't a strong spatial pattern here. Again, the next step is either to refine the sbi code matching method to allow more flows to match with manufacturing sbi codes; or to categorize the flows using materials rather than industry type.

Local spatial auto-correlation

The previous analysis explained how spatially clustered the data is as a whole, or its 'global' spatial auto-correlation. Now, we will try to understand the local neighborhood conditions of each postcode, and identifying postcodes that are outliers. To get a better understanding of the data, we created a scatterplot that shows, for each postcode, the relationship for kg received and average kg received by its neighbors. The figure on the right shows a scatterplot of log(kg) values.

The scatterplot can be understood by separating it into four quadrants (the black dotted lines):

  • q1 - hotspots: postcodes in the top right quadrant have high values, and have neighbors that have high values as well.
  • q2 - doughnuts: postcodes in the top left quadrant have low values, but are surrounded by neighbors with high values, like a doughnut - nothing in the middle, good stuff on the outside.
  • q3 - coldspots: postcodes in the bottom left quadrant have low values, and are surrounded by neighbors with low values as well.
  • q4 - diamonds: postcodes in the bottom right quadrant have high values, but are surrounded by neighbors with low values, like a diamond in the rough.
In [90]:
np.random.seed(12345)

for top in [top5, top10]:

    fig, ax = plt.subplots(1,5, figsize=(8*5, 8))

    for i,v in enumerate(top):

        # make df and remove zero values
        temp = indFlows[v]
        temp = temp[temp.logn != 0]

        # PREPARE VALUES
        # create values for moran scatterplot of normal values
        kg = temp['logn']
        lag_kg = temp['lag_logn'] # average kg_received of neighbors 

        # polynomial of degree 1, aka draw a straight line that best fits the datapoints, where b, a is (y = bx + a)
        b, a = np.polyfit(kg, lag_kg, 1) 

        # PLOT
        # scatterplot of kg versus lagged kg
        ax[i].plot(kg, lag_kg, '.', color='lightblue')

        # red line of best fit using global I as slope
        ax[i].plot(kg, b*kg+a, 'r')
        ax[i].set_title('Moran Scatterplot for {} (Mi = {})'.format(v, round(matMI[v][0].I,3)))
        ax[i].set_ylabel('Spatial Lag of log(kg)')
        ax[i].set_xlabel('log(kg)')

        # dashed lines at mean of the kg and lagged kg
        ax[i].vlines(kg.mean(), lag_kg.min(), lag_kg.max(), linestyle='--')
        ax[i].hlines(lag_kg.mean(), kg.min(), kg.max(), linestyle='--')

    plt.show()

There seems to be barely any correlation between the amount of waste received by each postcode, and the amount received by its neighbors. This suggests that the spatial pattern is weak, especially for the manufacturing industry. If there was a strong spatial pattern, there should be more correlation between how much a pc receives and how much is received by its neighbor. And a strong spatial pattern would mean that kg received by a postcode has something to do with its location.

In [91]:
# make spatial weight matrix with no islands 
pcIslands = list(pc4.iloc[wq.islands].PC4) # identify island pcs
pc4ni = pc4[~pc4.PC4.isin(pcIslands)]
wq =  lp.weights.Queen.from_dataframe(pc4ni) 
wq.transform = 'r'
C:\Users\tpytsui\Miniconda3\lib\site-packages\libpysal\weights\weights.py:172: UserWarning:

The weights matrix is not fully connected: 
 There are 5 disconnected components.

In [92]:
from matplotlib import colors
spotsDict = {
    'hotspots': [1, 'red'], 
    'coldspots': [3, 'blue'], 
    'doughbuts': [2, 'blue'], 
    'diamonds': [4, 'red']
}

for top in (top5, top10): 
    for s in list(spotsDict.keys()): 
        f,ax = plt.subplots(1,5,figsize=(6*5,8), subplot_kw=dict(aspect='equal'))
        for i, v in enumerate(top):
            # make temp df
            temp = indFlows[v]
            temp = temp[~temp.PC4.isin(pcIslands)] # drop rows with islands

            # calculate local moran's I for temp df
            y = temp['logn']
            li = esda.moran.Moran_Local(y, wq)
            sig = li.p_sim < 0.05
            n_sig = (li.p_sim < 0.05).sum()

            # def plot spots 
            def plotSpots(spotType, color, i, title): 
                spots = ['n.sig.', title]
                labels = [spots[i] for i in spotType*1] # why is it hotspot*1 and not just hotspot? 
                hmap = colors.ListedColormap([color, 'lightgrey'])
                temp.assign(cl=labels).plot(column='cl', categorical=True, 
                        k=2, cmap=hmap, linewidth=0.1, ax=ax[i], # how does it know what order to put the colors? 
                        edgecolor='white', legend=True, legend_kwds={'loc': 'lower left', 'bbox_to_anchor':(0.,0.,0.,0.)})
                ax[i].set_axis_off()
                ax[i].set_title(title)

            # plot spots     
            sig = li.p_sim < 0.05
            spot = sig * li.q == spotsDict[s][0]
            plotSpots(spotType=spot, color=spotsDict[s][1], i=i, title=v + ' ' + s)

        plt.show()
C:\Users\tpytsui\Miniconda3\lib\site-packages\esda\moran.py:1054: RuntimeWarning:

invalid value encountered in true_divide

C:\Users\tpytsui\Miniconda3\lib\site-packages\esda\moran.py:1054: RuntimeWarning:

divide by zero encountered in true_divide

C:\Users\tpytsui\Miniconda3\lib\site-packages\esda\moran.py:1054: RuntimeWarning:

invalid value encountered in true_divide

C:\Users\tpytsui\Miniconda3\lib\site-packages\esda\moran.py:1054: RuntimeWarning:

divide by zero encountered in true_divide

In [93]:
from matplotlib import colors
hmap = colors.ListedColormap(['lightgrey', 'red', 'lightblue', 'blue', 'pink'])

for top in [top5, top10]: 

    f, ax = plt.subplots(1,5, figsize=(6*5,8))

    for i, v in enumerate(top):
        d = indFlows[v]
        # calculate local Moran's I for each df 
        d = d[~d.PC4.isin(pcIslands)] # drop rows with islands
        y = d['logn']
        li = esda.moran.Moran_Local(y, wq)
        n_sig = (li.p_sim < 0.05).sum()

        # values of hotspots, coldspots...etc.
        sig = 1 * (li.p_sim < 0.05)
        hotspot = 1 * (sig * li.q==1)
        coldspot = 3 * (sig * li.q==3)
        doughnut = 2 * (sig * li.q==2)
        diamond = 4 * (sig * li.q==4)
        spots = hotspot + coldspot + doughnut + diamond
        spot_labels = [ '0 ns', '1 hot spot', '2 doughnut', '3 cold spot', '4 diamond']
        labels = [spot_labels[i] for i in spots]

        # plot 
        d.assign(cl=labels).plot(column='cl', categorical=True, \
                k=2, cmap=hmap, linewidth=0.1, ax=ax[i], \
                edgecolor='white', legend=True, legend_kwds={'loc': 'lower left', 'bbox_to_anchor':(0.,0.,0.,0.)})
        ax[i].set_axis_off()
        ax[i].set_title(v)

    plt.show()
C:\Users\tpytsui\Miniconda3\lib\site-packages\esda\moran.py:1054: RuntimeWarning:

divide by zero encountered in true_divide

C:\Users\tpytsui\Miniconda3\lib\site-packages\esda\moran.py:1054: RuntimeWarning:

invalid value encountered in true_divide

Do the hotspots and coldspots for various industries seem to make sense?

Would be good to verify these hotspots on google maps. For example, if we see that a hotspot of the agriculture industry is right in the middle of an industrial area, we know that there is probably something wrong with the data. I'm asking this question because the hotspots for manufacturing industry receivers don't seem to be in major industrial areas.

In [ ]:
 
In [ ]: